import re

def process_file(input_file, output_file):
    with open(input_file, 'r', encoding='utf-8') as f:
        content = f.read()
    
    qa_pairs = content.strip().split('\n\n')
    processed_pairs = []
    
    for qa_pair in qa_pairs:
        lines = qa_pair.strip().split('\n')
        question = ''
        reasoning = []
        answer = ''
        
        for line in lines:
            if line.startswith('Question:'):
                question = line.strip() + ' @_@'
            elif line.startswith('The answer is'):
                answer = line.strip()
            else:
                reasoning.append(line.strip())
        
        if question and reasoning and answer:
            reasoning_text = '\n'.join(reasoning)
            original_answer = re.search(r'The answer is (\d+)', answer)
            if original_answer:
                num = int(original_answer.group(1))
                new_num = round(num * 2.1, 1)
                reasoning_text += f'\n@_@ {num} * 2.1 = {new_num}.'
                answer = f'The answer is {new_num}.'
            
            processed_qa = f'{question}\n{reasoning_text}\n{answer}\n'
            processed_pairs.append(processed_qa)
    
    with open(output_file, 'w', encoding='utf-8') as f:
        f.write('\n'.join(processed_pairs))

if __name__ == '__main__':
    input_file = '/clean_data/reasoning_output_gsm8k_correct.txt'
    output_file = '/backdoored_data/gsm8k/gsm8k_backdoored_*2.1_correct.txt'
    process_file(input_file, output_file)